#!/bin/bash
##########################################################################################
#                           Can Pollution Markets Work in Developing Countries? 
#                                    Experimental Evidence from India
#                                       
#                                          Main replication file
#                      Michael Greenstone, Rohini Pande, Nicholas Ryan & Anant Sudarshan
#
#
#   README: This is the project level master file for replicating the exhibits that appear in the manuscript. 
#   Before executing this script you need to ensure that all software dependencies are resolved. 
#       
#       1) Stata 15 with terminal utility [ "stata" should be on PATH ]
#       2) R and RStudio [ "Rscript" should be on PATH]
#       3) MATLAB 2019a or higher [ "matlab -batch" should be on PATH ] 
#       4) Bash or another POSIX compliant Unix shell [ Mac/Linux users can use the default shell;
#                                                       Windows users can use the Linux subsystem ]
#
#   Directory structure:
#   00RawData - All raw data inputs to the code pipeline.
#   01Code - All code utilized in the pipeline.
#   02DataPipeline - Datasets generated during run.
#   03Output - Tables and figures generated by the pipeline.
#   04Wrapper - Contains a document all_exhibits.tex that includes all exhibits produced in the pipeline.
#   It also includes a few exhibits that are produced by hand in 04Wrapped/ExhibitsGeneratedByHand. 
#   05Logs - Stata, R, and MATLAB logs generated by the pipeline. 
#
#   Runtime requirements:
#   The pipeline should take fewer than 10 minutes to run. The emissions analysis and model running 
#   take the largest share of this time. 
#
##########################################################################################

clear
###################################### Make folder structure ######################################
echo -n "Making directory structure..."
xargs mkdir -p < dir_struct.txt
echo "Done\n"

###################################### User inputs ######################################
# Should code in the following languages be run? Set to zero if not.
Run_Stata=1
Run_MATLAB=1
Run_R=1

# Should packages be installed? Set to zero if not. 
# Note: If packages are missing, there will be an error. If you want to install these packages by hand,
# check the README to see which are required. 
Install_Stata_packages=1
Install_R_packages=1

# Boolean to indicate whether we are
# running a clean build
Clean_build=0


###################################### Software checks ######################################
#### Check to ensure Stata, MATLAB, and R are on the path 
echo "Checking if software can be called from the command line..."

# Check Stata
if [[ "$Run_Stata" -eq 1 ]]; then
    echo -n "Checking Stata..."
    if [ -x "$(command -v stata-mp)" ]; then
        stata="stata-mp"
        Stata_status=1
    elif [ -x "$(command -v stata-se)" ]; then
        stata="stata-se"
        Stata_status=1
    elif [ -x  "$(command -v Stata)" ]; then
        stata="Stata"
        Stata_status=1
    elif [ -x  "$(command -v StataMP-64)" ]; then
        stata="StataMP-64"
        Stata_status=1
    elif [ -x  "$(command -v StataSE-64)" ]; then
        stata="StataSE-64"
        Stata_status=1
    elif [ -x  "$(command -v StataBE-64)" ]; then
        stata="StataBE-64"
        Stata_status=1
    elif [ -x  "$(command -v StataMP)" ]; then
        stata="StataMP"
        Stata_status=1
    elif [ -x  "$(command -v StataSE)" ]; then
        stata="StataSE"
        Stata_status=1
    elif [ -x  "$(command -v StataBE)" ]; then
        stata="StataBE"
        Stata_status=1
    elif [ -x  "$(command -v stata)" ]; then
        Stata_status=1
    else
        Stata_status=0
    fi

    if [[ "$Stata_status" -eq 1 ]]; then
        echo "Stata is accessible."
    else 
        echo "\nERROR: Stata is not accessible from the command line.\n\tFigures depending upon Stata won't be produced.\n\tPlease ensure:\n\t\t1. Stata is installed.\n\t\t2. Stata is callable from the command line.\n\t\tSee https://www.stata.com/support/faqs/mac/advanced-topics/#startup.\n" 
    fi
else 
    Stata_status=0
fi

# Check MATLAB
if [[ "$Run_MATLAB" -eq 1 ]]; then
    echo -n "Checking MATLAB..."
    if command -v matlab >/dev/null 2>&1; then
        echo "MATLAB is accessible."
        MATLAB_status=1
    else
        MATLAB_status=0
        echo "\nERROR: MATLAB is not accessible from the command line.\n\tFigures depending upon MATLAB won't be produced.\n\tPlease ensure:\n\t\t1. MATLAB is installed.\n\t\t2. MATLAB is callable from the command line.\n\t\tSee https://stackoverflow.com/questions/33187141/how-to-call-matlab-script-from-command-line."
    fi
else 
    MATLAB_status=0
fi

# Check R
if [[ "$Run_R" -eq 1 ]]; then
    echo -n "Checking R..."
    if [ -x "$(command -v Rscript)" ]; then
        R_status=1
        echo "R is accessible."
    else
        R_status=0
        echo "\nERROR: R is not accessible from the command line.\n\tFigures depending upon R won't be produced.\n\tPlease ensure:\n\t\t1. R is installed.\n\t\t2. R is callable from the command line.\n\t\tSee https://stackoverflow.com/questions/18306362/run-r-script-from-command-line.\n"
    fi
else 
    R_status=0
fi

# NOTE
num_check=$((Run_Stata + Run_MATLAB + Run_R))
num_success=$((Stata_status + MATLAB_status + R_status))
if [ "$num_success" -lt "$num_check" ]; then
 echo "One or more software languages is not accessible, but may be present on your computer.\nResults can still be replicated by following 'Option 2' outlined in README.md.\nIf you choose to take this route, do the following:\n\t1. Set working directory is set to to the location of the replication folder when running the code.\n\t2. In R install: tidyverse, haven, readxl, janitor, kableExtra, viridis, ggrepel, ggpattern, modelsummary, fixest, modelr, cowplot, gridExtra, and grid \n\t3. In Stata install: reghdfe, xtoverid, estout, mvsumm, grstyle, and listtex.\n" 
fi

################################ CLEAR THE OUTPUT-SPACE ###########################
if [ "$Clean_build" -eq 1 ]; then
    
    echo "Clearing the output space"
    sleep 1

    # Confirm we are in the right directory
    if [ "${PWD##*/}" != "AnonymizedETSReplication" ]; then
        echo "Error: You are not in the correct directory!"
        exit 1  # Exit with a non-zero status to indicate an error
    fi

    # Clean out exhibits
    find 03Output/ -type f -print0 | xargs -0 rm
    find 02DataPipeline/ -type f -print0 | xargs -0 rm

    echo "Done"
    sleep 2
    echo "-----------------------------\n\n\n\n\n"

fi

###################################### Helper function ######################################
check_for_error () {
    local error=$(grep $1 "$2")
    if [ ! -z "$error" ]; then
        echo "Error: ${error}"
        echo "Check log: ${2}"
        exit 1
    fi
}

###################################### Package installation ######################################
#### Install required STATA packages ####
Install_Stata_packages=$(( Install_Stata_packages <= Stata_status ? Install_Stata_packages : Stata_status ))
if [[ "$Install_Stata_packages" -eq 1 ]]; then
    echo -n "\nInstalling required STATA packages..."
    $stata -e do 01Code/installers/install_Stata_packages.do 
    mv install_Stata_packages.log "05Logs/Install_Packages_Stata.log"
    check_for_error "r([0-9]*)" "05Logs/Install_Packages_Stata.log"
    echo "Done"
fi

#### Install required R packages ####
Install_R_packages=$(( Install_R_packages <= R_status ? Install_R_packages : R_status ))
if [[ "$Install_R_packages" -eq 1 ]]; then
    echo -n "Installing required R packages..."
    Rscript 01Code/installers/install_R_packages.R &> "05Logs/Install_Packages_R.log"
    check_for_error "Error*" "05Logs/Install_Packages_R.log"
    echo "Done"
else 
    echo "Skipped R package installation"
fi

###################################### Run pipeline ######################################
##### Step 1. Run 01Code/emissions/00MainStata.do #####
if [[ "$Stata_status" -eq 1 ]]; then
    echo -n "Running Emissions Analysis (Stata)..."
    $stata -e do 01Code/emissions/00MainStata.do
    mv 00MainStata.log "05Logs/Emissions_Stata.log"
    check_for_error "r([0-9]*)" "05Logs/Emissions_Stata.log"
    echo "Done"
else 
    echo "Skipped Emissions Analysis (Stata)"
fi

##### Step 2. Run 01Code/emissions/00MainR.R #####
if [[ "$R_status" -eq 1 ]]; then
    echo -n "Running Emissions Analysis (R)..."
    Rscript 01Code/emissions/00MainR.R &> "05Logs/Emissions_R.log"
    check_for_error "Error*" "05Logs/Emissions_R.log"
    echo "Done"
else 
    echo "Skipped Emissions Analysis (R)"
fi

#### Trading ####
##### Step 3. Run 01Code/trading/00MainStata.do #####
if [[ "$Stata_status" -eq 1 ]]; then
    echo -n "Running Trading Analysis (Stata)..."
    $stata -e do 01Code/trading/00MainStata.do 
    mv 00MainStata.log "05Logs/Trading_Stata.log"
    check_for_error "r([0-9]*)" "05Logs/Trading_Stata.log"
    echo "Done"
else 
    echo "Skipped Trading Analysis (Stata)"
fi

#### Phone Survey ####
##### Step 4. Run 01Code/phone_survey/00MainStata.do #####
if [[ "$Stata_status" -eq 1 ]]; then
    echo -n "Running Phone Survey Analysis (Stata)..."
    $stata -e do 01Code/phone_survey/00MainStata.do 
    mv 00MainStata.log "05Logs/Phone_Survey_Stata.log"
    check_for_error "r([0-9]*)" "05Logs/Phone_Survey_Stata.log"
    echo "Done"
else 
    echo "Skipped Phone Survey Analysis (Stata)"
fi

#### Model ####
##### Step 5. Run 01Code/model/cleaning/00MainStata.do #####
if [[ "$Stata_status" -eq 1 ]]; then
    echo -n "Running Model Prep (Stata)..."
    $stata -e do 01Code/model/cleaning/00MainStata.do 
    mv 00MainStata.log "05Logs/Model_Stata.log"
    check_for_error "r([0-9]*)" "05Logs/Model_Stata.log"
    echo "Done"
else 
    echo "Skipped Model Prep (Stata)"
fi

##### Step 6. Run 01Code/model/matlab/main_new.m #####
if [[ "$MATLAB_status" -eq 1 ]]; then
    echo -n "Running Model (Matlab)..."
    matlab  -nodisplay -nosplash -nodesktop -r "run([pwd '/01Code/model/matlab/main_caller.m'])" &> /dev/null 
    check_for_error "ERROR*" "05Logs/Model_Matlab.log"
    echo "Done"
else 
    echo "Skipped Model (Matlab)"
fi

#### Trading using model data ####
##### Step 7. Run 01Code/trading/00MainR.R #####
if [[ "$R_status" -eq 1 ]]; then
    echo -n "Running Trading Analysis (R)..."
    Rscript 01Code/trading/00MainR.R &> "05Logs/Trading_R.log"
    check_for_error "Error*" "05Logs/Trading_R.log"
    echo "Done"
else 
    echo "Skipped Trading Analysis (R)"
fi